/****************************************************************************************************
* Title: Workhorses of Opportunity, Howard and Weinstein
*
* Inputs:
*   - justnormasylum.dta
*   - county_outcomes_dta.dta
*
* Outputs:
*   - westfall_young.dta (Romano-Wolf procedure results)
*   - adjusted_pvalues.pdf (graph of Romano-Wolf and BKY q-values)
*   - unadjusted_pvalues.pdf (histogram of unadjusted p-values)
*
* Description:
*   This script estimates the effect of being from a normal school county on various outcomes
*   across the income distribution using multiple testing corrections (Romano-Wolf and BKY).
****************************************************************************************************/

* Load and prepare normal school indicator
clear 
use "justnormasylum"
rename hasnormalschool hasnormal
keep hasnormal cty_fips 
tempfile normal
save `normal'

* Set up environment
clear
clear matrix
clear mata
set maxvar 15000

* Load Chetty data and merge with normal school indicator
clear
use county_outcomes_dta
gen cty_fips = state * 1000 + county
rename state statefips
rename county countychetty
merge 1:m cty_fips using `normal', keep(3)

* Recode female-specific teen birth variable to pooled format
foreach percentile in p1 p25 p50 p75 p100 {
	gen teenbrth_pooled_pooled_`percentile' = teenbrth_pooled_female_`percentile'
}

* Rename pooled variables to shorter names
rename *_pooled_pooled_* *_pp_*

* Run Romano-Wolf multiple testing correction on treatment effects
randcmd ///
	((hasnormal) reg hs_pp_p1 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg hs_pp_p25 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg hs_pp_p50 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg hs_pp_p75 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg hs_pp_p100 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg coll_pp_p1 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg coll_pp_p25 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg coll_pp_p50 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg coll_pp_p75 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg coll_pp_p100 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg somecoll_pp_p1 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg somecoll_pp_p25 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg somecoll_pp_p50 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg somecoll_pp_p75 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg somecoll_pp_p100 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg married_pp_p1 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg married_pp_p25 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg married_pp_p50 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg married_pp_p75 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg married_pp_p100 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg teenbrth_pp_p1 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg teenbrth_pp_p25 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg teenbrth_pp_p50 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg teenbrth_pp_p75 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg teenbrth_pp_p100 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg jail_pp_p1 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg jail_pp_p25 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg jail_pp_p50 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg jail_pp_p75 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg jail_pp_p100 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg staycz_pp_p1 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg staycz_pp_p25 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg staycz_pp_p50 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg staycz_pp_p75 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg staycz_pp_p100 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg stayhome_pp_p1 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg stayhome_pp_p25 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg stayhome_pp_p50 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg stayhome_pp_p75 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg stayhome_pp_p100 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg working_pp_p1 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg working_pp_p25 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg working_pp_p50 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg working_pp_p75 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg working_pp_p100 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg kir_pp_p1 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg kir_pp_p25 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg kir_pp_p50 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg kir_pp_p75 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg kir_pp_p100 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg kfr_pp_p1 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg kfr_pp_p25 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg kfr_pp_p50 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg kfr_pp_p75 hasnormal, absorb(statefips) cluster(statefips)) ///
	((hasnormal) reg kfr_pp_p100 hasnormal, absorb(statefips) cluster(statefips)) ///
	, treatvars(hasnormal) strata(statefips) saving(westfall_young, replace)

* Run Romano-Wolf joint test
timer on 1
rwolf hs_pp_p1 coll_pp_p1 somecoll_pp_p1 married_pp_p1 teenbrth_pp_p1 jail_pp_p1 staycz_pp_p1 stayhome_pp_p1 working_pp_p1 kfr_pp_p1 kir_pp_p1 ///
  hs_pp_p25 coll_pp_p25 somecoll_pp_p25 married_pp_p25 teenbrth_pp_p25 jail_pp_p25 staycz_pp_p25 stayhome_pp_p25 working_pp_p25 kfr_pp_p25 kir_pp_p25 ///
  hs_pp_p50 coll_pp_p50 somecoll_pp_p50 married_pp_p50 teenbrth_pp_p50 jail_pp_p50 staycz_pp_p50 stayhome_pp_p50 working_pp_p50 kfr_pp_p50 kir_pp_p50 ///
  hs_pp_p75 coll_pp_p75 somecoll_pp_p75 married_pp_p75 teenbrth_pp_p75 jail_pp_p75 staycz_pp_p75 stayhome_pp_p75 working_pp_p75 kfr_pp_p75 kir_pp_p75 ///
  hs_pp_p100 coll_pp_p100 somecoll_pp_p100 married_pp_p100 teenbrth_pp_p100 jail_pp_p100 staycz_pp_p100 stayhome_pp_p100 working_pp_p100 kfr_pp_p100 kir_pp_p100 ///
  , indepvar(hasnormal) method(regress) controls(i.statefips) seed(7) cluster(statefips) vce(cluster statefips) reps(1000)
timer off 1
timer list 1

* Export Romano-Wolf p-values
mat pval = e(RW)
clear
svmat pval
keep pval1
rename pval1 pval

* Compute BKY (2006) sharpened q-values
gen original_sorting_order = _n
sort pval
gen rank = _n if pval != .
local totalpvals = _N
gen bky06_qval = 1 if pval != .

local qval = 1
quietly {
	while `qval' > 0 {
		local qval_adj = `qval' / (1 + `qval')
		gen fdr_temp1 = `qval_adj' * rank / `totalpvals'
		gen reject_temp1 = (fdr_temp1 >= pval) if pval != .
		gen reject_rank1 = reject_temp1 * rank
		egen total_rejected1 = max(reject_rank1)

		local qval_2st = `qval_adj' * (`totalpvals' / (`totalpvals' - total_rejected1[1]))
		gen fdr_temp2 = `qval_2st' * rank / `totalpvals'
		gen reject_temp2 = (fdr_temp2 >= pval) if pval != .
		gen reject_rank2 = reject_temp2 * rank
		egen total_rejected2 = max(reject_rank2)

		replace bky06_qval = `qval' if rank <= total_rejected2 & rank != .
		drop fdr_temp* reject_temp* reject_rank* total_rejected*
		local qval = `qval' - 0.001
	}
}
sort original_sorting_order

* Plot adjusted vs. unadjusted p-values
// scatter pval3 bk pval, legend(label(1 "Romano-Wolf (2010) p-values") label(2 "BKY (2006) q-values")) ///
// 	yline(.05 .1, lcolor(gs12) lpattern(dash)) yscale(range(0)) ylabel(#5)
// graph rename adjusted_pvalues, replace
// graph export "adjusted_pvalues.pdf", as(pdf) replace

* Plot histogram of raw p-values
hist pval, start(0) width(.05) percent xtitle("p-value") xlabel(0(.1)1)
graph rename unadjusted_pvalues, replace
graph export "unadjusted_pvalues.pdf", as(pdf) replace
